
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
# 从 sklearn的datasets模块载入数据集加载酒的数据集
data1=pd.read_csv('winequality-red.csv', sep=';')##已知数据
data2=pd.read_csv('估价数据.csv', sep=';')##要进行预测的数据
x_train=data1.drop(["quality"],axis=1)
y_train=data1["quality"]
# 将数据集拆分为训练数据集和测试数据集
X_train,X_test,Y_train,Y_test=train_test_split(x_train,y_train,random_state=0)
param_grid={
    'n_estimators':[5,10,20,50,100,200],#决策树的个数
    'max_depth':[3,5,7,15,20],#最大树深，树太深会造成过拟合
    'max_features':[0.6,0.7,0.8,1]#决策树划分时考虑的最大特征数
}
rf=RandomForestClassifier()
grid=GridSearchCV(rf,param_grid=param_grid,cv=3)

x_test=data2
grid.fit(X_train,Y_train)
rf_reg=grid.best_estimator_
print(rf_reg)
# 评估模型的准确率

# 使用建好的模型对新酒进行分类预测
score = rf_reg.score(X_test,Y_test)
print(score)#输出模型得分
submission={"price":rf_reg.predict(x_test)}##进行预测价格
submission=pd.DataFrame(submission)
submission.to_csv('结果.csv')